﻿/* 
%include "/projects/hsieh_project/code_0_general/m_unit_status.sas" /source2;

Macros that determine the job creation "status" of a firm or an establishment.

----------------------------------------
The four basic status include:
1) Unit that enters between year1 and year2
2) Unit that exits between year1 and year2
3) Unit that has increasing or the same employment between year1 and year2
4) Unit that has decreasing employment between year1 and year2

----------------------------------------
There are two additional status that might be utilized in some cases.
5) Unit that exists in year1 and year2 but enters into a new category in year2. 5 identifies the "sub"-unit of the unit that is in the new category.
6) Unit that exists in year1 and year2 but exits a previous cateogry. 6 identifies the "sub"-unit of the unit that is in the previous category.
An example of the "category" above can be tradable vs non-tradable sectors.

Consider the following example where a firm expands to the tradable sector
year,city,firmnum,tradable,year_in,year_out,worker,status
1977,1,1,0,1977,2013,100,3
2013,1,1,0,1977,2013,200,3
2013,1,1,1,1977,2013,50,5

----------------------------------------
Common Parameters
pv_byone: variables by which unit's year in and year out are determined
pv_bytwo: additional variables by which unit's status is determined (only relevant for 5 and 6)
pv_unit: the unit id (e.g. firmnum, lbdid)
pv_status: the "root" of resulting status variable name

Note that a previous version of this macro (archive/m_unit_stat_by_var.sas) would require input data set to be at pv_byone pv_bytwo pv_unit year level.
This macro improved upon the previous version by adding a proc means and removing this requirement.
*/


/*
================================================================================
Core macro for unit status

dsi_in = input data set
dsi_out = output data set (if "&dsi_out." = "&dsi_in." or "", then add status columns to dsi_in.)

*/

%macro m_unit_status_core(dsi_in= , dsi_out= ,pv_byone= , pv_bytwo= , pv_unit= , pv_status= , p_year1= , p_year2= );

%put --------------------------------------------------------------------------------;
%put m_unit_status_core;
%put dsi_in = &dsi_in.;
%put dsi_out = &dsi_out.;
%put pv_byone = &pv_byone.;
%put pv_bytwo = &pv_bytwo.;
%put pv_unit = &pv_unit.;
%put pv_status = &pv_status.;
%put p_year1 = &p_year1.;
%put p_year2 = &p_year2.;


data dsi_status;
  set &dsi_in.;
  if year in (&p_year1., &p_year2.);
run;

/* 
--------------------------------------------------------------------------------
Collapse to &pv_unit. &pv_byone. &pv_bytwo. year level
*/
proc sort data=dsi_status; by &pv_unit. &pv_byone. &pv_bytwo. year;

proc means data=dsi_status noprint;
  by &pv_unit. &pv_byone. &pv_bytwo. year;
  output out=dsi_status(drop=_type_ _freq_) sum(worker)=worker;
run;

/*
--------------------------------------------------------------------------------
Find year_in and year_out by &pv_unit. &pv_byone.
year_in = the first year the unit appears in data
year_out = the last year the unit appears in data

IMPORTANT: 
year_in and year_out are defined at &pv_unit. &pv_byone. level and does not involve &pv_bytwo.
Recall that we refer to the unit defined at &pv_unit. &pv_byone. &pv_bytwo. as "sub"-unit to make this distinction.
*/

proc sort data=dsi_status;
  by &pv_unit. &pv_byone. year;
run;

proc means data=dsi_status noprint;
  by &pv_unit. &pv_byone.;
  output out=ds_year(drop=_type_ _freq_) max(year)=year_out min(year)=year_in;
run;

data dsi_status;
  merge dsi_status ds_year;
  by &pv_unit. &pv_byone.;
  keep &pv_unit. &pv_byone. &pv_bytwo. year worker year_in year_out; /* Corrected 191211 */
run;

/* 
--------------------------------------------------------------------------------
Assign status code to units

Code for unit status
1: Units that enter in year2
2: Units that exit in year2
3: Units that increase or do not change employment in year2
4: Units that decrease employment in year2
5: Units that enter a new pv_bytwo in year2
6: Units that exit one of pv_bytwo in year2
*/

proc sort data=dsi_status;
  by &pv_byone. &pv_bytwo. &pv_unit. year;
run;

/*
Note that in the data step that immediately follows,
1 and 5 are only in year2 by definition
2 and 6 are only in year1 by definition
3 and 4 are only assigned in year2 (which we will deal with after this step)
*/

data dsi_status;
  set dsi_status;
  by &pv_byone. &pv_bytwo. &pv_unit.; /* Corrected the sequence 190614 */
  
  /*
  --------------------------------------
  1 & 2
  1) if year_in = terminal year, then new entrant
  2) if year_out = initial year, then exit
  */
  if year_in = &p_year2. then &pv_status. = 1;
  else if year_out = &p_year1. then &pv_status. = 2;
  
  /*
  --------------------------------------
  Calculate change in employment
  */
  l1_worker = lag1(worker); /* Create lagged employment (i.e. employment from previous year) */
  if first.&pv_unit. then l1_worker = .; /* For the first year, assign missing */
  d_worker = worker - l1_worker; /* Calculate the change in employment */
  
  /*
  --------------------------------------
  3-6
  3) If the change is positive, then increasing
  4) If the change is negative, then decreasing
  5) If the "sub"-unit is still not assigned 1-4, and it first appears in year2, then it expands to a new pv_bytwo
  6) If the "sub"-unit is still not assigned 1-4, and it last appears in year1, then it exits to a previous pv_bytwo
  Note that the "sub"-unit assigned 5 6 will not be assigned 1 or 2 because the year_in year_out are defined at unit instead of "sub"-unit level.
  It will not be assigned 3 or 4 because d_worker is missing for it.
  */
  if &pv_status. = . & d_worker >= 0 & d_worker ~= . then &pv_status. = 3;
  else if &pv_status. = . & d_worker < 0 & d_worker ~= . then &pv_status. = 4;
  else if &pv_status. = . & first.&pv_unit. & year = &p_year2. then &pv_status. = 5;
  else if &pv_status. = . & last.&pv_unit. & year = &p_year1. then &pv_status. = 6;
run;
/* 
Remember that dsi_status is at &pv_byone. &pv_bytwo. &pv_unit. year level.
Note that 3 and 4 are only marked for year2.
The first. last. for 5 and 6 are likely to be redundent.
*/

/*
--------------------------------------------------------------------------------
Finalize the status definition
*/

/*
Units that exists in year1
*/
data dsi_status1;
  set dsi_status(keep=&pv_unit. &pv_byone. &pv_bytwo. year &pv_status.);
  if &pv_status. in (1, 5) then delete;
  else if &pv_status. = . then delete; /* Drop year1 for 3 4, because their status is defined in year2, as mentioned above */
  year = &p_year1.;
  rename &pv_status.=&pv_status._&p_year1._&p_year2.; /* Rename the variable to include year1 and year2 */
run;

/*
Units that exists in year2
*/
data dsi_status2;
  set dsi_status(keep=&pv_unit. &pv_byone. &pv_bytwo. year &pv_status.);
  if &pv_status. in (2, 6) then delete;
  else if &pv_status. = . then delete;  /* Drop year1 for 3 4, because their status is defined in year2, as mentioned above */
  year = &p_year2.;
  rename &pv_status.=&pv_status._&p_year1._&p_year2.; /* Rename the variable to include year1 and year2 */
run;

/*
All units. It can be confirmed that the total number of observations matches.
*/
data dsi_status;
  set dsi_status1 dsi_status2;
run;

/*
--------------------------------------------------------------------------------
Prepares output
*/

proc sort data=dsi_status;
  by &pv_unit. &pv_byone. &pv_bytwo. year;
run;

%if "&dsi_out." = "&dsi_in." %then %do;

%put ----------------------------------------;
%put Merge &dsi_in. dsi_status;
data &dsi_in.;
  merge &dsi_in. dsi_status;
  by &pv_unit. &pv_byone. &pv_bytwo. year;
run;
%end;
%else %do;
data &dsi_out.;
  set &dsi_in.;
run;
%end;

%mend;

/*
================================================================================
Macro that gets the status over a list of years

pl_year: The list of year to go through.
e.g. pl_year = 1977,1987,1997,2007
Then the job creation between the following year pairs will be calculated:
1977-1987,1987-1997,1997-2007,1977-2007.
*/

%macro m_unit_status(dsi_in= , dsi_out= , pv_byone= , pv_bytwo= , pv_unit= , pv_status= , pl_year= );

%put Now Determine Status;
%put Dataset &dsi_in.;
%put By &pv_unit. pv_byone=&pv_byone. pv_bytwo=&pv_bytwo.;

proc sort data=&dsi_in.;
  by &pv_unit. &pv_byone. &pv_bytwo. year;
run;

%Let i_list = 2;
%do %while (%scan(%bquote(&pl_year), &i_list) ~= );
  %Let i_year1=%scan(%bquote(&pl_year), %eval(&i_list.-1));
  %Let i_year2=%scan(%bquote(&pl_year), &i_list.);
  
  %m_unit_status_core(dsi_in=&dsi_in., dsi_out=&dsi_out., pv_byone=%bquote(&pv_byone.), pv_bytwo=%bquote(&pv_bytwo.), pv_unit=&pv_unit., pv_status=&pv_status., p_year1=&i_year1., p_year2=&i_year2.);
  
  %Let i_list = %eval(&i_list + 1);
%end;

%Let i_year1=%scan(%bquote(&pl_year), 1);
%Let i_year2=%scan(%bquote(&pl_year), %eval(&i_list.-1));

%m_unit_status_core(dsi_in=&dsi_in., dsi_out=&dsi_out., pv_byone=%bquote(&pv_byone.), pv_bytwo=%bquote(&pv_bytwo.), pv_unit=&pv_unit., pv_status=&pv_status., p_year1=&i_year1., p_year2=&i_year2.);

%mend;

/* End of sas file */
